function [f_vec3,g_vec3,time_vec3,theta_vec3] = SBFW (fun_f, grad_f_y,grad_f_x,...
    grad_g_y, grad_g_yx,grad_g_yy,param,theta_0, lambda_0)
disp('SBFW Algorithm starts');
%-------------------------parameter definition-----------------------------
maxiter = param.maxiter;
%lower-level parameters
L_g = param.lg;
mu_g = param.mug;

%Initialization
x1= lambda_0; 
x2 = [1;0;0;0];
y= theta_0;
n = length(x1);
m = length(y);
a0 = min(2/(3*mu_g), mu_g/ (2* L_g^2));

tic;
% --------------------------SBFW algorithm --------------------------------
iter = 1;
f_vec3 = [];
g_vec3 = [];
theta_vec3 = theta_0';
time_vec3 = [];
p = ceil((L_g/mu_g));
d = grad_f_x - (grad_g_yx'*(p/L_g)*grad_f_y(y));
h2 = d;
while iter <= maxiter
    iter = iter+1;
    rho = 2 / (iter^(2/3));
    eta = 2/ (iter+1);
    gamma = a0 / (iter^(2/3));
    h_g = grad_g_y(x1,y);

    h1 = h2;
    y = y - gamma*h_g; % update of y 

    H = (eye(m,m) - (1/L_g)* grad_g_yy);
    p = ceil(2/3*(L_g/mu_g)*log(iter));
    i = 0;
    % --------------------estimation of hessian inverse--------------------
    while i<randi(p)
        H = H*((eye(m,m) - (1/L_g)* grad_g_yy) );
        i = i+1;
    end

    h2 = grad_f_x - (grad_g_yx'*(p/L_g)*H*grad_f_y(y));
    
    d = ((1-rho) * (d - h1 ))+ h2;

    % -----------------------------Find s----------------------------------
    [~,ind] = min(d);
    s = zeros(n,1); s(ind)=1;
    %----------------------------------------------------------------------
    x = (1-eta)*x2 + eta*s; % update of x

    cpu_t3 = toc;
    f_vec3 = [f_vec3;fun_f(y)];
    g_vec3 = [g_vec3;norm(grad_g_y(x,y))];
    theta_vec3 = [theta_vec3;y'];
    time_vec3 = [time_vec3;cpu_t3];

    x1 = x2;
    x2 = x;
    
    
end

end
    